setwd("C:\\Users\\aniverb\\Documents\\Grad_School\\JHU\\700")

tv=read.table("trades_pivot_2016-11-18.txt", sep="\t", header=T) 
head(tv)
##   ProductName   Maturity       Date    X6    X7    X8    X9   X10   X11
## 1          6E 2016-06-13 2016-05-02  4743  4371 14131 19188 26520 10183
## 2          6E 2016-06-13 2016-05-03  7761  9281 23010 18470 27603 18569
## 3          6E 2016-06-13 2016-05-04  4065 10092 31111 16142 41140 12268
## 4          6E 2016-06-13 2016-05-05 11257  9318 17680 22189 30305 12814
## 5          6E 2016-06-13 2016-05-06  3582  4173 58424 29355 16648  7940
## 6          6E 2016-06-13 2016-05-09  4806  9660 12547  9459 14289  6987
##    X12   X13   X14  X15  X16 X17 X18 DayTradeTotal
## 1 9504  5228  6112 2577 2616   0   0        105173
## 2 8871 10705  7300 3884 2149  34   9        137646
## 3 4785  4985  6424 2846 1784  15   0        135657
## 4 7126  6431  4706 3802 1169   1   0        126798
## 5 5493  6323 10189 4226 1304   1   0        147658
## 6    0     0     0    0    0  16   0         57764
pv=read.table("trades_pivot_vol_2016-11-18.txt", sep="\t", header = T)
head(pv) 
##   ProductName   Maturity       Date    X6    X7     X8     X9    X10   X11
## 1          6E 2016-06-13 2016-05-02 4.195 2.388  5.414 15.202 10.399 2.440
## 2          6E 2016-06-13 2016-05-03 2.845 4.866  5.204  6.455 12.777 3.812
## 3          6E 2016-06-13 2016-05-04 2.709 5.932 11.152  7.811  9.139 4.576
## 4          6E 2016-06-13 2016-05-05 3.966 3.955  4.532  8.688  7.442 3.817
## 5          6E 2016-06-13 2016-05-06 3.432 1.998 19.902  8.070  8.105 2.980
## 6          6E 2016-06-13 2016-05-09 2.251 4.298  6.758  4.963  7.391 3.536
##     X12   X13   X14   X15   X16   X17   X18 DayVolatility
## 1 6.733 3.743 4.343 1.836 5.190    NA    NA        16.844
## 2 6.492 2.912 6.776 1.797 3.144 0.866 0.837        23.703
## 3 2.625 3.157 3.281 2.045 2.589 0.354    NA        12.542
## 4 4.305 3.403 2.657 1.482 1.047    NA    NA        14.243
## 5 2.001 5.503 4.956 1.861 1.606    NA    NA        17.925
## 6    NA    NA    NA    NA    NA 0.635    NA         8.338
tv_pv_comb=cbind(tv[,1:16], pv[,4:16], tv[17], pv[17])
head(tv_pv_comb)
##   ProductName   Maturity       Date    X6    X7    X8    X9   X10   X11
## 1          6E 2016-06-13 2016-05-02  4743  4371 14131 19188 26520 10183
## 2          6E 2016-06-13 2016-05-03  7761  9281 23010 18470 27603 18569
## 3          6E 2016-06-13 2016-05-04  4065 10092 31111 16142 41140 12268
## 4          6E 2016-06-13 2016-05-05 11257  9318 17680 22189 30305 12814
## 5          6E 2016-06-13 2016-05-06  3582  4173 58424 29355 16648  7940
## 6          6E 2016-06-13 2016-05-09  4806  9660 12547  9459 14289  6987
##    X12   X13   X14  X15  X16 X17 X18    X6    X7     X8     X9    X10
## 1 9504  5228  6112 2577 2616   0   0 4.195 2.388  5.414 15.202 10.399
## 2 8871 10705  7300 3884 2149  34   9 2.845 4.866  5.204  6.455 12.777
## 3 4785  4985  6424 2846 1784  15   0 2.709 5.932 11.152  7.811  9.139
## 4 7126  6431  4706 3802 1169   1   0 3.966 3.955  4.532  8.688  7.442
## 5 5493  6323 10189 4226 1304   1   0 3.432 1.998 19.902  8.070  8.105
## 6    0     0     0    0    0  16   0 2.251 4.298  6.758  4.963  7.391
##     X11   X12   X13   X14   X15   X16   X17   X18 DayTradeTotal
## 1 2.440 6.733 3.743 4.343 1.836 5.190    NA    NA        105173
## 2 3.812 6.492 2.912 6.776 1.797 3.144 0.866 0.837        137646
## 3 4.576 2.625 3.157 3.281 2.045 2.589 0.354    NA        135657
## 4 3.817 4.305 3.403 2.657 1.482 1.047    NA    NA        126798
## 5 2.980 2.001 5.503 4.956 1.861 1.606    NA    NA        147658
## 6 3.536    NA    NA    NA    NA    NA 0.635    NA         57764
##   DayVolatility
## 1        16.844
## 2        23.703
## 3        12.542
## 4        14.243
## 5        17.925
## 6         8.338
pvals=c()
prod=c()
products=as.vector(unique(tv_pv_comb$ProductName))
for (product in products){
  tv_pv_combS=subset(tv_pv_comb, ProductName==product)
  maturities=as.vector(unique(tv_pv_combS$Maturity))
  for (maturity in maturities){
    tv_pv_combM=subset(tv_pv_combS, Maturity==maturity)
    if (!(tv_pv_combM$ProductName=="NQ"& tv_pv_combM$Maturity=='2017-06-16') & !(tv_pv_combM$ProductName=="KE"& tv_pv_combM$Maturity=='2016-05-13')){
    trades=as.vector(as.matrix(t(tv_pv_combM[,4:16])))
    priceVol=as.vector(as.matrix(t(tv_pv_combM[,17:29])))
    df=data.frame(trades, priceVol)[order(trades),]
    df=na.omit(df)
    n=nrow(df)
    dfTrim=df[ceiling(.05*n):round(.95*n),]
    dfTrimO=dfTrim[order(dfTrim$priceVol),]
    n2=nrow(dfTrimO)
    dfTrimP=dfTrimO[ceiling(.05*n2):round(.95*n2),]
    plot(dfTrimP, xlab="trade volume hourly", ylab="price volatility hourly", main=paste(product, "Maturity:", maturity), pch=20) 
    daily=tv_pv_combM[,30:31][order(tv_pv_combM$DayTradeTotal),]
    daily=na.omit(daily)
    nD=nrow(daily)
    dailyTrim=daily[ceiling(.05*nD):round(.95*nD),]
    dailyTrimO=dailyTrim[order(dailyTrim$DayVolatility),]
    nD2=nrow(dailyTrimO)
    dailyTrimP=dailyTrimO[ceiling(.05*nD2):round(.95*nD2),]
    plot(dailyTrimP, xlab="trade volume daily", ylab="price volatility daily", main=paste(product, "Maturity:", maturity), pch=20)
    cors=apply(tv_pv_combM, 1, function(x){cor(as.numeric(x[4:16]), as.numeric(x[17:29]), use="na.or.complete")})
    #print(cors)
    days=c(1:length(cors))
    dfCors=data.frame(days, cors=as.vector(cors))
    mod=lm(cors~days, dfCors)
    cor_hat=predict(mod)
    plot(cors, xlab="Day", ylab="correlation coefficient", main=paste(product, "Maturity:", maturity))
    lines(cor_hat, col="red")
    stat=summary(mod)
    stat=summary(mod)
    cof=stat$coefficients
    pval=cof[,4][2]
    pvals=append(pvals, pval, after=length(pvals))
    prod=append(prod, paste(product, maturity), after=length(prod))
    }
  }
}

pvalsO=order(pvals)
corSig=data.frame(prod, pvals)[pvalsO,]
nrow(corSig)
## [1] 147
corSigO=corSig[which(corSig$pvals<.05),]
corSigO
##              prod        pvals
## 65  NQ 2016-12-16 2.950842e-10
## 21  GC 2016-06-28 1.617581e-08
## 82  SI 2016-12-28 2.749536e-07
## 17  ES 2016-12-16 6.315729e-07
## 43  HO 2016-10-31 6.343677e-07
## 11  CL 2016-10-20 3.920537e-06
## 91  YM 2016-12-16 6.082911e-06
## 49  KE 2016-12-14 8.173742e-06
## 3   6E 2016-12-19 1.413740e-05
## 123 ZN 2016-06-21 1.715649e-05
## 25  GC 2016-12-28 2.028279e-05
## 117 ZM 2016-08-12 3.198893e-05
## 57  NG 2016-10-27 3.422380e-05
## 64  NQ 2016-09-16 4.062054e-05
## 116 ZM 2016-07-14 4.166508e-05
## 80  SI 2016-07-27 4.393664e-05
## 90  YM 2016-09-16 4.670113e-05
## 55  NG 2016-08-29 5.676959e-05
## 23  GC 2016-08-29 1.676184e-04
## 58  NG 2016-11-28 2.494257e-04
## 2   6E 2016-09-19 2.609325e-04
## 10  CL 2016-09-20 3.005405e-04
## 62  NG 2017-04-26 3.114433e-04
## 14  CL 2017-01-20 6.315559e-04
## 12  CL 2016-11-21 7.159779e-04
## 44  HO 2016-11-30 8.575602e-04
## 138 ZT 2016-06-30 1.433644e-03
## 75  RB 2017-01-31 1.716298e-03
## 145 ZW 2016-12-14 2.288428e-03
## 108 ZL 2016-07-14 2.351571e-03
## 95  ZB 2016-12-20 4.516551e-03
## 98  ZC 2016-07-14 4.602761e-03
## 66  NQ 2017-03-17 4.771302e-03
## 130 ZQ 2016-08-31 5.776035e-03
## 73  RB 2016-11-30 6.396327e-03
## 76  RB 2017-02-28 7.112443e-03
## 24  GC 2016-10-27 8.093131e-03
## 9   CL 2016-08-22 9.068422e-03
## 18  ES 2017-03-17 9.728344e-03
## 6   CL 2016-05-20 1.003574e-02
## 16  ES 2016-09-16 1.049977e-02
## 72  RB 2016-10-31 1.094336e-02
## 143 ZW 2016-07-14 1.271685e-02
## 103 ZF 2016-06-30 1.283829e-02
## 46  HO 2017-01-31 1.288902e-02
## 40  HO 2016-07-29 1.519699e-02
## 122 ZM 2017-03-14 1.578590e-02
## 35  GE 2016-12-19 1.792763e-02
## 109 ZL 2016-08-12 1.882357e-02
## 38  HO 2016-05-31 2.091298e-02
## 93  ZB 2016-06-21 2.270895e-02
## 47  KE 2016-07-14 2.291505e-02
## 77  RB 2017-03-31 2.495967e-02
## 71  RB 2016-09-30 2.580315e-02
## 96  ZB 2017-03-22 2.852215e-02
## 111 ZL 2016-10-14 3.238554e-02
## 42  HO 2016-09-30 4.151802e-02
## 135 ZQ 2017-01-31 4.445211e-02
set.seed(3-3-17)
u=sort(runif(147))
plot(corSig$pvals, u, ylab="uniform", xlab="P-values")